Preprocessing

# Packages
#library("CyTOF") # Need to adjust name
library("devtools")
library(ruv)
library(rsvd)
load_all() # Load my package
library("cytofkit")
library("gridExtra")
library("ggpubr")
library("tidyverse")
library("FlowSOM")
library("RColorBrewer")
# Takes list of plots, character vector of titles
add_titles <- function(plots, titles){
  map2(plots, titles, function(x, y) x + labs(titles = y))
}
# NB: These could well break if ggplot2 is updated
get_x_scale <- function(plot){
  ggplot_build(plot)$layout$panel_params[[1]]$x.range
}

set_x_scale <- function(plot, x_limits){
  plot + coord_cartesian(xlim = x_limits)
}
set.seed(42)
titles <- params$titles
# Get the full file names
raw_files <- paste0(here::here("Data", "/"), params$raw_files)

if (!is.null(other_files)){
  other_files <- paste0(here::here("Data", "/"), params$other_files)
  files <- c(raw_files, other_files)
} else {
  files <- raw_files
}

# Read in the files
n_raw_files <- length(raw_files)
data_list <- map(files, ~readRDS(.))
n_data <- length(data_list)
samples <- params$samples
# Change to sensible input for debugging purposes
# This is caused by my inability to pass vectors by default
if (samples == 1){
  samples <- c("1B1", "2B1", "3B1", "4B1", "5B1", "6B1")
}
## Warning in if (samples == 1) {: the condition has length > 1 and only the
## first element will be used
data_list <- purrr::map(data_list, ~ filter(., sample %in% samples))
samples <- params$samples
n_clust <- max(data_list[[1]]$cluster) # Get the clustering 
norm_clus <- params$norm_clusters
k <- params$k

for(i in 1:n_raw_files){
  norm_data <- normalise_data(data_list[[i]], norm_clus, k, num_clusters = n_clust)
  data_list[[length(data_list) + 1]] <- norm_data
  n_data <- length(data_list)
}
# Perform tSNE on the data
tsne_list <- map(data_list, function(x) compute_tsne(x, N = 5000))
##   Running t-SNE...with seed 42  DONE
##   Running t-SNE...with seed 42  DONE
##   Running t-SNE...with seed 42  DONE

PCA

plot_pca_samp <- map(data_list, ~plot_scpca_samp(., N = 2000))
plot_pca_samp <- flatten(plot_pca_samp)
plot_pca_samp <- add_titles(plot_pca_samp, rep(titles, each = 3))
ggarrange(plotlist = plot_pca_samp, ncol = 3, nrow = 3, common.legend = TRUE, legend = "right")

t-SNE

t-SNE coloured by sample

plot_tsne_samp <- map(tsne_list, plot_tsne_sample)
plot_tsne_samp <- add_titles(plot_tsne_samp, titles)
ggarrange(plotlist = plot_tsne_samp, ncol = 2, nrow = 2, common.legend = TRUE, legend = "right")

F statistics

plot_Ftsne <- map(tsne_list, tsne_F_stats)
# Ensure the scales of the plots are the same 
# NB: assumes the first plot will have the largest range
raw_scale <- get_x_scale(plot_Ftsne[[1]][[1]])
for(i in 1:n_data){
  plot_Ftsne[[i]][[1]] <- set_x_scale(plot_Ftsne[[i]][[1]], raw_scale)
}
plot_Ftsne <- flatten(plot_Ftsne)
ggarrange(plotlist = plot_Ftsne, ncol = 2, nrow = 3)

t-SNE coloured by cluster

plot_tsne_clus <- map(tsne_list, plot_tsne_cluster)
plot_tsne_clus <- add_titles(plot_tsne_clus, titles)
ggarrange(plotlist = plot_tsne_clus, ncol = 2, nrow = 2)

Cluster Frequency

clus_freq_plot <- map(data_list, plot_cluster_freq)
# Ensure the scales of the plots are the same 
# NB: assumes the first plot will have the largest range
raw_scale <- get_x_scale(clus_freq_plot[[1]][[2]])
for(i in 1:n_data){
  clus_freq_plot[[i]][[2]] <- set_x_scale(clus_freq_plot[[i]][[2]], raw_scale)
}
clus_freq_plot <- flatten(clus_freq_plot)
ggarrange(plotlist = clus_freq_plot, ncol = 2, nrow = 3)

Median marker intensity

median_exprs_plots <- map(data_list, plot_median_exprs)
median_exprs_plots <- add_titles(median_exprs_plots, titles)
ggarrange(plotlist = median_exprs_plots, ncol = 2, nrow = 2, legend = "bottom")

Marker Densities

marker_densities <- map(data_list, plot_marker_densities)
marker_densities <- add_titles(marker_densities, titles)
ggarrange(plotlist = marker_densities, ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom")

Cluster Matching

if (!is.null(other_files)){
  # The indexing should be changed
  clust_match_plot <- plot_cluster_match(data_list[[1]], data_list[[2]])
  clust_match_plot <- clust_match_plot + labs(x = titles[[1]], y = titles[[3]])
  clust_match_plot
}